home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
listings
/
v_12_02
/
otto
/
bzyatg.c
< prev
next >
Wrap
C/C++ Source or Header
|
1993-12-01
|
6KB
|
278 lines
/*********************************************
*
* Fast String Search, using shift or algorithm
*
* July 1993 by Erick Otto.
*
* Algorithms from Beaza-Yates and Gonnet
* are used in this code.
*
*********************************************/
#include <stdio.h>
#include <fcntl.h>
#include <ctype.h>
#define WORD 32 /* # of bits in an Uint */
#define B 1 /* # of bits to shift */
#define MAXSYM 256 /* alphabet size */
unsigned int stopmask;
unsigned int T[MAXSYM];
#define TBUF (8*BUFSIZ) /* X times system buffer */
#define MAXPAT WORD
main(argc, argv)
int argc;
char **argv;
{
char buf[TBUF+2];
char match[MAXPAT];
register int fd, nread;
register char *beg, *lastnl, *end;
char filename[100];
char first;
char build();
/* must be at least two arguments */
if (argc < 2 ) {
fprintf(stderr,"\n\tMatch string expected\n");
exit(1);
}
if (argc < 3 ) {
fprintf(stderr,"\n\tFile expected\n");
exit(1);
}
strcpy(match, argv[1]);
strcpy(filename, argv[2]);
first = build(match);
if ( (fd = open(filename,O_RDONLY)) == -1) {
fprintf(stderr,
"Can't open file %s\n",filename);
perror(filename);
exit(1);
}
*buf = '\n'; /* sentinel for printing purposes */
beg = buf+1; /* start filling buffer at buf+1 */
while((nread=read(fd,beg,(&buf[TBUF]-beg))) > 0){
end = beg+nread;
lastnl = end;
while(*--lastnl != '\n')
;
/* lastnl points to the newline */
matchpat(buf+1, lastnl-buf,first);
/* move the part skipped (from newline */
/* to end of buffer) back to the begin */
/* of the buffer. */
memcpy(buf+1, lastnl, end-lastnl-1);
beg = buf+1 + (end-lastnl);
}
close(fd);
}
char
build(pat)
register char *pat;
{
unsigned int mask;
int i,j;
char *C_pat;
char *space;
char *malloc();
char startrange;
char endrange;
char first;
first = *pat;
/* pre process */
if (first == '.' || first == '[') {
fprintf(stderr,"%s %s\n",
"Can not start with wildcard \'.\'",
"or regular expression range");
exit(1);
}
/* Initialize the table */
for (i=0;i<MAXSYM;i++) {
T[i] = ~0;
}
/* allow for . wildcard */
space=malloc((strlen(pat)+1) * sizeof(char));
if (space == NULL) {
fprintf(stderr,"Malloc failed\n");
exit(1);
}
C_pat = space;
mask = ~0;
/* scan the pattern for ranges and or wildcards */
for(i=1;*pat;i <<= B) {
switch(*pat) {
case '[':
/*start of a range*/
pat++;
while(*pat != ']' && *pat) {
if (isalnum(*pat)) {
/* if nxt char is a dash */
/* it's a range */
if (*(pat+1) == '-') {
startrange = *pat;
pat++;
pat++;
endrange = *pat;
for(j=startrange;j<=endrange;j++){
T[j] &= ~i;
}
*C_pat = startrange;
} else {
/*normal character in range*/
*C_pat = *pat;
T[*pat] &= ~i;
}
} else if (*pat == '-') {
if (*(pat-1) == '[')
fprintf(stderr,
"Invalid Expression\n");
exit(1);
} else {
fprintf(stderr,
"Invalid Expression\n");
exit(1);
}
pat++;
} /* while not ']' */
C_pat++;
break;
case '.':
/* We need to set the char position to
* something special, in this case it doesn't
* really matter
*/
*C_pat++ = 'a';
mask &=~i;
break;
default:
*C_pat++ = *pat;
break;
} /* end of switch */
pat++;
} /* end of for loop */
/* close the string and rewind the ptr */
*C_pat = '\0';
C_pat = space;
if (strlen(C_pat) > MAXPAT) {
fprintf(stderr,
"Pattern too long max %d chars\n",WORD);
exit(1);
}
/* apply wildcard mask to all elements */
for (i=0;i<MAXSYM;i++) {
T[i] = T[i] & mask;
}
/* Set masks for all chars that occur */
/* in the pattern and calculate the */
/* match criteria */
stopmask=0;
for(j=1;*C_pat;j <<=B) {
T[*C_pat] &= ~j;
stopmask |=j;
C_pat++;
}
stopmask = ~(stopmask >>B);
free(space);
return(first);
}
matchpat(text,n,first)
register char *text;
int n;
char first;
{
register unsigned int state,initial;
int matches;
int gotoeol = 0;
register char *nl;
register char *end;
char *savepos;
char savechar;
end = text+n;
/* save the last character */
/* which we use as a sentinel */
/* for the while loop */
savepos = end+1;
savechar = *savepos;
*savepos = first;
/* search */
matches = 0;
initial = ~0;
do {
/* fast scan */
while(*text != first) text++;
if (text > end) continue;
state=initial;
do {
state= (state << B) | T[*text];
if (state <stopmask) {
/****** match **********/
nl=text;
/*look back for the closest newline*/
while(*--nl != '\n')
;
while(*++nl != '\n') putchar(*nl);
putchar('\n');
/* skip the rest of the line */
text=nl;
}
text++;
} while ( state != initial);
} while (text < end);
/* reset the character saved */
*savepos=savechar;
return(0);
}